#2016.1.26
#coding: utf-8
import re

xtable = {
    b"www.amazon.cn" : re.compile(b"dp/[A-Z0-9]+"),
    b"www.escentual.com" : re.compile(b".+"),
    b"www.luisaviaroma.com" : re.compile(b"index.aspx.*"),
    b"www.wiggle.cn" :  re.compile(b".+"),
    b"www.jomashop.com" : re.compile(b"\\.html"),
    b"www.amazon.ca" : re.compile(b"product/[A-Z0-9]+"),
    b"www.6pm.com" : re.compile(b"product/\\d+"),
    b"www1.macys.com" : re.compile(b"product/.+"),
    b"www.finishline.com" : re.compile(b"product\\?.+"),
    b"www.amazon.co.uk" : re.compile(b"(?:dp|product)/[A-Z0-9]+"),
    b"www.drugstore.com" : re.compile(b".catid+"),
    b"www.carters.com" : re.compile(b".carters+"),
    b"www.sears.com" : re.compile(b".+"),
    b"www.amazon.co.jp" : re.compile(b"(?:dp|product)/[A-Z0-9]+"),
    b"www.store.nba.com" : re.compile(b".+"),
    b"www.lookfantastic.com" : re.compile(b".list+|(?:[0-9]+.html)+"),
    b"www.rei.com" : re.compile(b"product/[0-9]+"),
    b"www.newbalance.com.cn" : re.compile(b"[index.]+"),
    b"www.swarovski.com.cn" : re.compile(b".product/+"),
    b"www.mdreams.com" : re.compile(b".+"),
    b"www.converse.com.cn" : re.compile(b".item+"),
    b"www.microsoftstore.com.cn" : re.compile(b"/p/+"),
    b"www.godiva.com" : re.compile(b".[0-9]+"),
    b"www.kiehls.com.br" : re.compile(b".+"),
    b"www.disneystore.com" : re.compile(b"./[0-9]+"),
    b"www.groupon.com" : re.compile(b"deals/+"),
    b"www.footlocker.com" : re.compile(b"product/+"),
    b"www.campsaver.com" : re.compile(b".+"),
    b"www.joesnewbalanceoutlet.com" : re.compile(b".?style+"),
    b"www.tiffany.cn" : re.compile(b".+"),
    b"www.amazon.fr" : re.compile(b"(?:dp|product)/[A-Z0-9]+"),
    b"www.rakuten.com" : re.compile(b"prod/+"),
    b"www.rakuten.com" : re.compile(b"prod/+")
    }

def urlsplit(url):
    if url.startswith(b"http://"):
        return url[7:].split(b'/', 1)
    else:
        return url.split(b'/', 1)

def match(url):
    host, path = urlsplit(url)
    return xtable[host].search(path)

def test():
    urls = [b'http://www.amazon.cn/dp/B00UKPMA44/ref=gwgfloorv1_CE_a_0?pf_rd_p=267224552&pf_rd_s=desktop-6',
            b'http://www.escentual.com/hair-care/bumblefullpotential004/',
            b'http://www.luisaviaroma.com/index.aspx?#ItemSrv.ashx|SeasonId=63I&CollectionId=G85&ItemId=17',
            b'http://www.wiggle.cn/eastway-emitter-r4-tiagra-2016/',
            b'http://www.jomashop.com/ferragamo-8050445769040.html',
            b'http://www.amazon.ca/gp/product/B00EAKKOKC/ref=br_bsl_pdt-3/179-1951858-8163852?pf_rd_m=A3DWYIK6Y9EEQB',
            b'http://www.6pm.com/product/8642433/color/518509',
            b'http://www1.macys.com/shop/product/scalamandre-tropez-4-piece-hostess-set?ID=1137462&CategoryID=7919',
            b'http://www.finishline.com/store/product?A=5587&categoryId=cat303193&productId=prod781512',
            b'http://www.amazon.co.uk/gp/product/B013F9YDZU?ref_=gbps_img_s-3_3127_619facb5&smid=A3P5ROKL5A1OLE',
            b'http://www.drugstore.com/replens-long-lasting-vaginal-moisturizer-pre-filled-applicators/qxp12711?catid=181800',
            b'http://www.carters.com/carters-baby-girl-baby-essentials/V_115G040.html?cgid=carters-baby-girl-baby-essentials&dwvar_V__115G040_color=Color&dwvar_V__115G040_size=3M#start=46&cgid=carters-baby-girl-baby-essentials',
            b'http://www.sears.com/craftsman-230-piece-mechanics-tool-set/p-00950230000P?prdNo=3&blockNo=3&blockType=G3',
            b'http://www.amazon.co.jp/gp/product/B00T90F0WU/ref=s9_ri_gw_g421_i3?pf_rd_m=AN1VRQENFRJN5&pf_rd_s=desktop-1&pf_rd_r=1FJ6VE0FWK2J89WWFKGD&pf_rd_t=36701&pf_rd_p=263612829&pf_rd_i=desktop',
            b'http://www.store.nba.com/Hats_adidas/Mitchell_And_Ness_Chicago_Bulls_Tri-Pop_Snapback_Hat',
            b'http://www.lookfantastic.com/magnitone-full-monty-vibra-sonictm-face-and-body-brush-electric-blue/11166759.html',
            b'http://www.rei.com/product/767613/new-england-7mm-accessory-cord-package-of-30-feet',
            b'http://www.newbalance.com.cn/index.php?s=/Home/Index/productList/tid/55.html',
            b'http://www.swarovski.com.cn/Web_CN/zh/5074329/product/Better_Butterfly_%E9%A1%B9%E9%93%BE.html',
            b'http://www.mdreams.com/melissa-system-love-now',
            b'http://www.converse.com.cn/all_star/12484C003/item.htm?iid=hpnvw06012015-02',
            b'http://www.microsoftstore.com.cn/%E7%B1%BB%E5%88%AB/Xbox/Xbox-One%E5%AE%B6%E5%BA%AD%E5%A8%B1%E4%B9%90%E6%B8%B8%E6%88%8F%E6%9C%BA/p/MIC0777?Icid=HotProduct_rank1_Xboxone_20151231',
            b'http://www.godiva.com/6pc-milk-chocolate-lovers-truffle-flight/78352.html',
            b'http://www.kiehls.com.br/cuidados-com-o-rosto/por-categoria/limpadores-esfoliantes-e-mascaras/rare-earth-deep-pore-daily-cleanser',
            b'http://www.disneystore.com/mickey-mouse-plush-blankie-for-baby-personalizable/mp/1393641/1014307/',
            b'http://www.groupon.com/deals/gg-hotel-new-york-comforter-set-1',
            b'http://www.footlocker.com/product/model:208261/sku:42964610/jordan-true-flight-mens/red/black/?cm=',
            b'http://www.campsaver.com/hi-loft-down-sweater-hoody-boy-s',
            b'http://www.joesnewbalanceoutlet.com/detail.asp?style=WLSB450MLT&size=&width=&category=all&selection=kapp&sort=ADDED&shoeLast=&price=',
            b'http://www.tiffany.cn/jewelry/necklaces-pendants/return-to-tiffany-heart-key-pendant-30971531?fromGrid=1&search_params=p+1-n+10000-c+288153-s+5-r+-t+-ni+1-x+-lr+-hr+-ri+-mi+-pp+300+6&search=0&origin=browse&searchkeyword=&trackpdp=bg&fromcid=288153#p+1-n+10000-c+288153-s+5-r+-t+-ni+1-x+-pu+-f+false+1-lr+-hr+-ri+-mi+-pp+300%2B6',
            b'http://www.amazon.fr/gp/product/B013UQOX62/ref=br_asw_pdt-3/276-2383077-6658614?pf_rd_m=A1X6FK5RDHNB96&pf_rd_s=desktop-1&pf_rd_r=1Y0READ8RFMQVMPEB04K&pf_rd_t=36701&pf_rd_p=782594987&pf_rd_i=desktop',
            b'http://www.rakuten.com/prod/secret-garden/252942368.html?listingid=354092914&omadtrack=recs',
            
            ]

    for i in map(match, urls):
        print(i)

    for i in range(10):
        for u in urls:
            match(u)
test()
